1 Objective

2 Pre-processing

2.1 Load packages

library(Seurat)
library(Signac)
library(dplyr)
library(tidyr)
library(purrr)
library(stringr)
library(ComplexUpset)
library(ggplot2)
library(plyr)

set.seed(173)
options(scipen=999)

2.2 Parameters

# Paths
path_to_obj <- here::here("scATAC-seq/results/R_objects/8.tonsil_atac_integrated_with_multiome_annotated_level1.rds")
path_to_macs2 <- here::here("scATAC-seq/results/files/1.MACS_annotation_level_1.tsv")

3 Preliminary annotation level 1

tonsil.atac = readRDS(path_to_obj)
tonsil.atac
## An object of class Seurat 
## 166293 features across 120929 samples within 1 assay 
## Active assay: peaks (166293 features, 166293 variable features)
##  3 dimensional reductions calculated: lsi, umap, harmony
DimPlot(
  tonsil.atac, 
  group.by = "annotation_level_1",
  cols = c("#a6cee3", "#1f78b4","#b2df8a", 
             "#33a02c", "#fb9a99","#e31a1c", 
             "#fdbf6f", "#ff7f00","#cab2d6",
             "#6a3d9a","#ffff99"),
  pt.size = 0.1
)

4 Spot potential doublets

4.1 Scrublet prediction

FeaturePlot(tonsil.atac, "scrublet_doublet_scores_atac")

4.2 QC metrics

qc_vars <- c(
  "nCount_ATAC",
  "nFeature_ATAC"
  )

qc_gg <- purrr::map(qc_vars, function(x) {
  p <- FeaturePlot(tonsil.atac, features = x)
  p
})
qc_gg
## [[1]]

## 
## [[2]]

4.3 Purity Score

4.3.1 Intersection peaks

peaks = read.table(path_to_macs2)
peaks_freq = data.frame(table(peaks$peak_called_in))
nrow(peaks_freq)
## [1] 19995
data_filter = peaks_freq[peaks_freq$Freq > 50,]
nrow(data_filter)
## [1] 406
dat <- peaks_freq
groups <- as.character(levels(tonsil.atac$annotation_level_1))
subsets <- peaks$peak_called_in

mat <- map_dfc(subsets, str_detect, groups) %>%
    data.frame() %>%
    t() %>% # transpose the result, ugh
    as_tibble()
colnames(mat)  <- groups
mat$count <- dat$count
options(repr.plot.width=20, repr.plot.height=10)

ComplexUpset::upset(data = mat, intersect = groups,
      name="Peaks Groupings by Cell Type", 
      min_size = 1000,
      width_ratio = 0.125) 

4.3.2 Purity score

From the intersection data, we decided to create a purity score to asses how many a peak are unique or share across cell type.

purity_score <- function(peaks_freq = peaks_freq, cluster_type)
{
  cluster = peaks_freq[grep(cluster_type, peaks_freq$Var1),]
  cluster_sharing= nchar(gsub('[^,]', '', cluster[grep(cluster_type, cluster$Var1),]$Var1))+1
  cluster$peaks_cluster = cluster$Freq * cluster_sharing
  PS = cluster[cluster$Var1 == cluster_type,]$peaks_cluster / sum(cluster[cluster$Var1 != 1,]$peaks_cluster)
  return(PS)
  }
cluster_types = levels(tonsil.atac$annotation_level_1)
ps_values = c()

for (cluster in cluster_types) 
{
 ps_value = purity_score(peaks_freq = peaks_freq, cluster)
 ps_values = c(ps_values,ps_value)
}

df = data.frame(cluster_types,ps_values)
df$ps_values = round(df$ps_values,4)
colnames(df) <- c("seurat_clusters","purity_score")
 
ggplot(data=df, aes(x=as.factor(cluster_types), y=ps_values/sum(ps_values))) +
  geom_bar(stat="identity") + theme_minimal()

tonsil.atac@meta.data$purity_score <- revalue(
  tonsil.atac@meta.data$annotation_level_1,
  c(
    "NBC_MBC" = 0.0198,
    "GCBC" = 0.0525,
    "PC" = 0.0107,
    "CD4_T" = 0.0248,
    "Cytotoxic" = 0.0085,
    "myeloid" = 0.0246,
    "FDC" = 0.0839,
    "PDC" = 0.0102,
    "epithelial" = 0.0744
  )
)

tonsil.atac@meta.data$purity_score <- log10(as.numeric(as.character(tonsil.atac@meta.data$purity_score)))
options(repr.plot.width=10, repr.plot.height=10)
FeaturePlot(tonsil.atac, "purity_score",cols = c("red", "blue"))

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Tonsil_atlas/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plyr_1.8.6          ggplot2_3.3.2       ComplexUpset_0.5.20 stringr_1.4.0       purrr_0.3.4         tidyr_1.1.2         dplyr_1.0.2         Signac_1.1.0.9000   Seurat_3.9.9.9010   BiocStyle_2.16.1   
## 
## loaded via a namespace (and not attached):
##   [1] reticulate_1.18             tidyselect_1.1.0            RSQLite_2.2.1               AnnotationDbi_1.50.3        htmlwidgets_1.5.2           grid_4.0.3                  BiocParallel_1.22.0         Rtsne_0.15                  munsell_0.5.0               codetools_0.2-17            ica_1.0-2                   future_1.20.1               miniUI_0.1.1.1              withr_2.3.0                 colorspace_2.0-0            Biobase_2.48.0              OrganismDbi_1.30.0          knitr_1.30                  rstudioapi_0.12             stats4_4.0.3                ROCR_1.0-11                 tensor_1.5                  listenv_0.8.0               labeling_0.4.2              GenomeInfoDbData_1.2.3      polyclip_1.10-0             farver_2.0.3                bit64_4.0.5                 rprojroot_2.0.2             parallelly_1.21.0           vctrs_0.3.4                 generics_0.1.0              xfun_0.18                   biovizBase_1.36.0           BiocFileCache_1.12.1        lsa_0.73.2                  ggseqlogo_0.1               R6_2.5.0                    GenomeInfoDb_1.24.0         rsvd_1.0.3                  AnnotationFilter_1.12.0     bitops_1.0-6               
##  [43] spatstat.utils_1.17-0       reshape_0.8.8               DelayedArray_0.14.0         assertthat_0.2.1            promises_1.1.1              scales_1.1.1                nnet_7.3-14                 gtable_0.3.0                globals_0.13.1              goftest_1.2-2               ggbio_1.36.0                ensembldb_2.12.1            rlang_0.4.8                 RcppRoll_0.3.0              splines_4.0.3               rtracklayer_1.48.0          lazyeval_0.2.2              dichromat_2.0-0             checkmate_2.0.0             BiocManager_1.30.10         yaml_2.2.1                  reshape2_1.4.4              abind_1.4-5                 GenomicFeatures_1.40.1      backports_1.2.0             httpuv_1.5.4                Hmisc_4.4-1                 RBGL_1.64.0                 tools_4.0.3                 bookdown_0.21               ellipsis_0.3.1              RColorBrewer_1.1-2          BiocGenerics_0.34.0         ggridges_0.5.2              Rcpp_1.0.5                  base64enc_0.1-3             progress_1.2.2              zlibbioc_1.34.0             RCurl_1.98-1.2              prettyunits_1.1.1           rpart_4.1-15                openssl_1.4.3              
##  [85] deldir_0.2-3                pbapply_1.4-3               cowplot_1.1.0               S4Vectors_0.26.0            zoo_1.8-8                   SummarizedExperiment_1.18.1 ggrepel_0.8.2               cluster_2.1.0               here_1.0.1                  magrittr_1.5                data.table_1.13.2           lmtest_0.9-38               RANN_2.6.1                  SnowballC_0.7.0             ProtGenerics_1.20.0         fitdistrplus_1.1-1          matrixStats_0.57.0          hms_0.5.3                   patchwork_1.1.0             mime_0.9                    evaluate_0.14               xtable_1.8-4                XML_3.99-0.3                jpeg_0.1-8.1                IRanges_2.22.1              gridExtra_2.3               compiler_4.0.3              biomaRt_2.44.4              tibble_3.0.4                KernSmooth_2.23-17          crayon_1.3.4                htmltools_0.5.0             mgcv_1.8-33                 later_1.1.0.1               Formula_1.2-4               DBI_1.1.0                   tweenr_1.0.1                dbplyr_1.4.4                MASS_7.3-53                 rappdirs_0.3.1              Matrix_1.2-18               parallel_4.0.3             
## [127] igraph_1.2.6                GenomicRanges_1.40.0        pkgconfig_2.0.3             GenomicAlignments_1.24.0    foreign_0.8-80              plotly_4.9.2.1              xml2_1.3.2                  XVector_0.28.0              VariantAnnotation_1.34.0    digest_0.6.27               sctransform_0.3.1           RcppAnnoy_0.0.16            graph_1.66.0                spatstat.data_1.4-3         Biostrings_2.56.0           rmarkdown_2.5               leiden_0.3.5                fastmatch_1.1-0             htmlTable_2.1.0             uwot_0.1.8.9001             curl_4.3                    shiny_1.5.0                 Rsamtools_2.4.0             lifecycle_0.2.0             nlme_3.1-150                jsonlite_1.7.1              viridisLite_0.3.0           askpass_1.1                 BSgenome_1.56.0             pillar_1.4.6                lattice_0.20-41             GGally_2.0.0                fastmap_1.0.1               httr_1.4.2                  survival_3.2-7              glue_1.4.2                  spatstat_1.64-1             png_0.1-7                   bit_4.0.4                   ggforce_0.3.2               stringi_1.5.3               blob_1.2.1                 
## [169] latticeExtra_0.6-29         memoise_1.1.0               irlba_2.3.3                 future.apply_1.6.0